% scribe: Saurabh Amin
% lastupdate: Oct. 2, 2005
% lecture: 2
% references: Durrett, sections 1.1 and 1.2
% title: Random variables and their distributions
% keywords: measurable, random variable, sigma-field, generating sigma-field, Borel sigma-field, product sigma-field, product space, extended real random variable, e.r.r.v., r.v., simple random variable, cdf, cumulative distribution function, distribution
% end

\documentclass[12pt,letterpaper]{article}

\include{macros}

\begin{document}

\lecture{2}{Random variables and their distributions}{Saurabh Amin}
{amins@berkeley.edu}

\section{Random variables}
% keywords: measurable, random variable
% end

Let $(\Omega,\mathcal{F})$ and $(S, \mathcal{S})$ be two measurable spaces. A map $X:\Omega\rightarrow S$ is \emph{measurable} or a \emph{random variable} (denoted r.v.) if
\begin{align*}
X^{-1}(A)\equiv\{\omega: X(\omega)\in A\}\in\mathcal{F} \text{ for all } A\in\mathcal{S}
\end{align*}

One can write $\{X \in A\}$ or $(X\in A)$ as a shorthand for 
$\{\omega: X(\omega)\in
A\}=X^{-1}(A)$. If $(S,\mathcal{S})=(\R^d,\mathcal{R}^d)$,
then $X$ is called a $d$-dimensional random vector. $\mathcal{R}$ is
the Borel $\sigma-$field or the $\sigma$-field generated by the open
subsets of $\R^n$.

An \emph{indicator function} is a classic example of a r.v.\ where
$S=\{0,1\}$ and $\mathcal{S}$ is the collection of all subsets of
$S$. The indicator function of a set $F\in\mathcal{F}$ is defined as

\begin{align*}
1_{F}(\omega)=
\begin{cases}
1 \mbox{ if $\omega\in F$}\\
0 \mbox{ if $\omega\notin F$}
\end{cases}
\end{align*}

If $S=\Omega$, then the identity map on $\Omega$ is a r.v.\ iff
$\mathcal{S}\subset\mathcal{F}$.

\emph{Fact}: The composition of two measurable maps is measurable.

\section{Generation of $\sigma$-field}
% keywords: sigma-field, generating sigma-field, Borel sigma-field, product sigma-field, product space
% end

Let $\mathcal{A}$ be a collection of subsets of $\Omega$. The
$\sigma$-field generated by $\mathcal{A}$, denoted by
$\sigma(\mathcal{A})$, is the smallest $\sigma$-field on $\Omega$
which contains $\mathcal{A}$.

Let $(X_i, i\in I)$ be a family of mappings of $\Omega$ into
measurable spaces $(S_i,\mathcal{S}_i)$, $i\in I$. Here, $I\neq\phi$
is an arbitrary index set (i.e., possibly uncountable). The
$\sigma$-field generated by $(X_i, i\in I)$, denoted by $\sigma(X_i,
i\in I)$, is the smallest $\sigma-$field on $\Omega$ with respect to
which each $X_i$ is measurable. If we take
$\mathcal{A}=\mathop{\cup}_i(X^{-1}(\mathcal{S}_{i}))$, this
case reduces to the previous one. In both the above cases,
'smallest' means the intersection of the collection of
$\sigma$-fields with the given property.

We now introduce product spaces and product $\sigma-$fields. Given
$(S,\mathcal{S})$ and index set $I$, let
$\Omega=\prod_{i}{(S_i)}=\{(\omega_{i}, i\in I):\omega_i \in S_i\}$,
where each $S_i$ is a copy of $S$. We have $\omega=(\omega_i\in
I)\in\Omega$ and projection maps $X_i:\Omega\rightarrow S_i$ such
that $X_i(\omega)=\omega_i$. The product $\sigma$-field
$\mathcal{F}$ on $\Omega$ is the $\sigma$-field generated by the
projections, i.e., $\mathcal{F}=\sigma((X^{-1}(F_i)):
F_i\in\mathcal{F}_i)$.

\section{Checking measurability}
% keywords: measurable, random variable, sigma-field, generating sigma-field
% end

\begin{theorem}
Let $(\Omega,\mathcal{F})$ be a measurable space and $X: \Omega \rightarrow S$. If $S$ has the $\sigma$-field $\sigma(\mathcal{A})$ for an arbitrary collection of sets $\mathcal{A}$, then X is measurable iff $(X\in \mathcal{A})\in \mathcal{F}$ for $A\in \mathcal{A}$.
\end{theorem}

\begin{proof}
We first prove the reverse direction.
Since $\{X\in A\}=\{\omega: X(\omega)\in A\}=X^{-1}(A)$, we have
\begin{align*}
X^{-1}(A^c)=(X^{-1}(A))^c \\
X^{-1}\left(\mathop{\bigcup}_{i}A_i\right)=\mathop{\bigcup}_i X^{-1}(A_i)\\
X^{-1}\left(\mathop{\bigcap}_{i}A_i\right)=\mathop{\bigcap}_i X^{-1}(A_i)
\end{align*}
Thus, $X^{-1}(\sigma(\mathcal{A}))=\sigma(X^{-1}(\mathcal{A}))$.

To prove the forward direction, note that the
collection $\mathcal{C}$ of subsets of $S$ given by $\mathcal{C} = 
\{B\subset S: X^{-1}(B)\in \mathcal{F}\}$ is a $\sigma$-field which contains 
$\mathcal{A}$ and
hence $\sigma(\mathcal{A})$ which is the $\sigma$-field generated by
$\mathcal{A}$.
\end{proof}

Similarly, if $S$ has the $\sigma$-field $\sigma(Y_i, i\in I)$, $X$ is 
measurable iff each $Y_i\circ X$ is measurable.

\section{Real and extended real random variables}
% keywords: extended real random variable, e.r.r.v., r.v., simple random variable
% end

Let $S$ be a metric or topological space. The \emph{Borel
$\sigma$-field} on $S$, denoted by $\mathcal{B}(S)$, is the
$\sigma$-field generated by open subsets of $S$. If $f:S\rightarrow
T$ is a continuous function, then $f$ is measurable from
$(S,\mathcal{B}(S))$ to $(T,\mathcal{B}(T))$ by the previous
theorem.

If $(S,\mathcal{S})=(\R,\mathcal{R})$, then some possible choices
of $\mathcal{A}$ are $\{(-\infty,x]:x\in \R\}$ or
$\{(-\infty,x):x\in \Q\}$ where $\Q=$ the rationals.

For the real line $\R=(-\infty,\infty)$ and extended real line
$\bar{\R}=[-\infty,\infty]$, the Borel $\sigma$-fields can
be defined as follows.

\begin{align*}
\mathcal{B}(\R)=\sigma\{(-\infty,x], x\in \R\}\\
\mathcal{B}(\bar{\R})=\sigma\{[-\infty,x], x\in\bar{\R}\}
\end{align*}

\begin{definition}[Real Random Variable] Let $(\Omega,\mathcal{F})$ be a measurable space. A
real random variable (r.r.v.) is a measurable map from $\Omega$ to
$\R$.
\end{definition} Thus a function $X$ with range $\R$ is
a r.v.\ iff $(X\leq x)\in\mathcal{F}$ for all $x\in\R$ (by
theorem 2.1). Similarly, extended real random variables (e.r.r.v.)
can be defined on range $\bar{\R}$.

Operations on real numbers are performed pointwise on real-valued
functions, e.g.,
\begin{align*}
Z=X+Y \mbox{ means } Z(\omega)=X(\omega)+Y(\omega) 
\mbox{ for all $\omega\in\Omega$}\\
\mbox{ and }Z=\lim_{n}Z_n \mbox{ means }
Z(\omega)=\lim_{n}Z_n(\omega) \mbox{ for all  $\omega\in\Omega$}
\end{align*}

\emph{Notation for real numbers}: $x\vee y=\max(x,y)$, $x\wedge
y=\min(x,y)$, $x^{+}=x\vee 0$, $x^{-}=-(x\wedge 0)$. Note that
$\left|x\right|=x^{+}+x^{-}$ and $x=x^{+}-x^{-}$.

\begin{theorem}
If $X_1, X_2,\ldots$ are e.r.r.v.'s on $(\Omega, \mathcal{F})$, then
they are closed under all limiting operations, i.e.,
\begin{align*}\inf_n{X_n}\text{,   }\sup_n{X_n}\text{,   }\liminf_n{X_n}\text{,   }\limsup_n{X_n}\end{align*}
are also e.r.r.v.
\end{theorem}
\begin{proof}
Since the infimum of a sequence is $<a$ iff some term is $<a$, we
have
\begin{align*}\left\{\inf_{n}X_n<a\right\}=\mathop{\bigcup}_{n}\{X_n<a\}\in\mathcal{F}\end{align*}
The proof for supremum follows similarly. 

For limit inferior of $X_n$, we have
\begin{align*}
\liminf_{n\rightarrow\infty} X_n := \sup_{n}\{\inf_{m\geq n}
X_{m}\} = \uparrow\lim_{n}\{\inf_{m\geq n}X_{m}\}
\end{align*}
Now note that $Y_n=\inf_{m\geq n}X_{m}$ is an e.r.r.v.\ for each $n$ and
so $\sup_nY_n$ is also an e.r.r.v.\ The proof for limit superior follows
similarly.
\end{proof}

From the above proof we see that
\begin{align*}
\Omega_{0}\equiv\left\{\omega:\lim_{n\rightarrow\infty}X_n \text{ exists
}\right\}=\left\{\omega:\limsup_{n\rightarrow\infty}X_n-\liminf_{n\rightarrow\infty}X_n=0\right\}
\end{align*}
is a measurable set. If $X_n(\omega)$ converges for all $\omega$,
i.e., $\P(\Omega_{0})=1$, we say that $X_n$ \emph{converges almost surely}
to $X$ which is also a e.r.r.v.
\begin{definition}[Simple Random Variable]
$X$ is a simple random variable iff $X$ is a finite linear
combination of indicators, i.e., $X$ can be expressed as
$X(\omega)=\sum_{i=1}^{n}c_i 1_{A_i}(\omega)$ where $c_i\in \R$ and
$A_i\in\mathcal{F}$. A simple r.v.\ can only take finitely many
values.
\end{definition}

\begin{theorem}
Every real r.v.\ $X$ is a pointwise limit of a sequence of simple r.v.'s, which can be taken to be increasing if $X\geq 0$.
\end{theorem}
\begin{proof}
For $X\geq 0$ let,
\begin{align*}
X_n=
\begin{cases}
\frac{k-1}{2^n} \text{ on } \{\frac{k-1}{2^n}\leq X < \frac{k}{2^n}\}, 0\leq k \leq n2^n \\
n \text{ on } \{X\geq n\}
\end{cases}
\end{align*}
Then $X_n\uparrow X$. For general $X$ use the decomposition
$X=X^{+}-X^{-}$.
\end{proof}

\begin{corollary}
Let $X$ and $Y$ be real valued r.v.'s. Then so are $XY$, $X+Y$, $X-Y$, $\min(X,Y)$, $\max(X,Y)$.
\end{corollary}
\begin{proof}
Consider $X_n\uparrow X$ and $Y_n\uparrow Y$. This implies $X_n Y_n \uparrow XY$. Similarly, use the previous theorem to pass from simple case to the more general cases.
\end{proof}

\section{Probability distribution on the real line}
% keywords: cdf, cumulative distribution function, distribution
% end

If $X$ is a real r.v.\ defined on some probability space $(\Omega,
\mathcal{F}, \P)$, then $X$ induces a probability measure on
$\R$ called its \emph{distribution}. Consider $\mu(B)=\P(X\in
B)=\P(X^{-1}(B))$ as a function of Borel sets $B$ of $\R$ .
To show that $\mu$ is a probability measure one can observe that for
disjoint $B_i$'s,

\begin{align*}
\mu(\cup_i B_i)=\P(X^{-1}(\cup_i B_i))=\P(\cup_i
X^{-1}(B_i))=\sum_{i}\P(X^{-1}( B_i))=\sum_i\mu(B_i)
\end{align*}

Commonly, the distribution of a r.v.\ $X$ on reals is described by
its \emph{cumulative distribution function} (cdf), $F(x)=\P(X\leq
x)$. In general, for a probability measure induced by r.v.\ $X$ on
$\R$, we can consider a point function
$F(x):=F((-\infty,x])$ (by abuse of notation).

\begin{theorem}
A cdf $F$ of some probability measure on $\R$ has the
following properties:
\begin{enumerate}
    \item $F$ is an increasing function of $x$.
    \item $\lim_{x\rightarrow\infty}F(x)=1$ and $\lim_{x\rightarrow -\infty}F(x)=0$
    \item $F$ is right continous, i.e., $\lim_{y\downarrow x}F(y)=F(x)$
\end{enumerate}
\label{distprops}
\end{theorem}
\begin{proof}
Refer to Theorem 1.1. in Durrett on page 4.
\end{proof}

\begin{theorem}
If $F$ satisfies the properties of Theorem \ref{distprops}, then it is the
distribution function of some r.v.\ and there is a unique probability
measure on $(\R,\mathcal{R})$ that has $\mu((a,b])=F(b)-F(a)$ for all $a,b$.
\end{theorem}
\begin{proof}
Let $F:\R\rightarrow(0,1)$ have properties $1,2,3$ in
Theorem 2.5. We construct a r.v.\ with distribution function $F$
carried by $(\Omega,\mathcal{F},\P)=((0,1),\mathcal{B}(0,1),Leb)$.
Define
\begin{align*}
X^{+}(\omega):=\inf\{z:F(z)>\omega\}=\sup\{y:F(y)\leq\omega\}\\
X^{-}(\omega):=\inf\{z:F(z)\geq\omega\}=\sup\{y:F(y)<\omega\}\\
\end{align*}

\begin{figure}
\centering
\includegraphics[width=5in]{fig1}
\caption{An illustration of some of the important cases to consider in the
construction of $X^-$ and $X^+$.}
\label{fig}
\end{figure}

Figure \ref{fig} shows cases to consider carefully. We have $(\omega\leq
F(c))\Rightarrow(\omega:X^{-}(\omega)\leq c)$ by definition. Now,
$(z>X^{-}(\omega))\Rightarrow(F(z)\geq\omega)$, and so by right
continuity of $F$, $(X^{-}(\omega)\leq c) \Rightarrow (\omega\leq
F(X^{-}(\omega))\leq F(c))$. Thus $(\omega\leq
F(c))\Leftrightarrow(X^{-}(\omega)\leq c)$ so that $\P(X^{-}\leq
c)=F(c)$. The variable $X^{-}$ therefore has distribution function
$F$, and we call its probability law $\mathcal{L}$.
Here, $\mathcal{L}$ is the unique probability measure on
$(\R,\mathcal{B})$ such that
$\mathcal{L}(\infty,x]=F(x),\forall x$.

Now, by definition of $X^{+}$,
$(\omega<F(c))\Rightarrow(X^{+}(\omega)\leq c)$, So $F(c)\leq
\P(X^{+}\leq c)$. Since $X^{-}\leq X^{+}$, $\{X^{-}\neq
X^{+}\}=\bigcup_{c\in \Q}\{X^{-}\leq c<X^{+}\}$ by the denseness of
the rational numbers. However,
for every $c\in\R$, $\P(X^{-}\leq c< X^{+})=\P(\{X^{-}\leq
c\}\setminus\{X^{+}\leq c\})\leq F(c)-F(c)=0$. Since, $\Q$
is countable, the result follows.

Having proved the existence of a r.v.\ $X$ with distribution function
$F$, the uniqueness can be checked easily by the $\pi-\lambda$
theorem.
\end{proof}

%%%---------------------------------------------------------------

\end{document}